Workdocumentation 2023-03-10
Jump to navigation
Jump to search
ceur-ws-stats.sh
#!/bin/bash
# WF 2020-06-02
# script to check CEUR-WS statistics
#
# get all volume directories
#
volumes() {
for v in $(find . -name "Vol-*" -type d)
do
echo "$v"
done
}
#
# count the static links to the given link in all
# html files found
#
getLinkCounts() {
local l_link="$1"
local l_nums="$2"
for h in $(find . -name "*.html")
do
grep "$l_link" "$h" | wc -l >> $l_nums
done
}
# see https://stackoverflow.com/a/9789898/1497139
calcStatistics() {
local l_nums="$1"
cat $l_nums | awk '
{
a += $1;
b[++i] = $1;
}
END {
m = a/NR; # mean
for (i in b)
{
d += (b[i]-m)^2;
e += (b[i]-m)^3;
f += (b[i]-m)^4;
}
va = d/NR; # variance
sd = sqrt(va); # standard deviation
sk = (e/NR)/sd^3; # skewness
ku = (f/NR)/sd^4-3; # standardized kurtosis
print "N,sum,mean,variance,std,SEM,skewness,kurtosis"
print NR "," a "," m "," va "," sd "," sd/sqrt(NR) "," sk "," ku
}
'
}
linkStats() {
nums=/tmp/ceur-ws-staticlinks.txt
if [ ! -f $nums ]
then
echo "getting link counts"
getLinkCounts "http://ceur-ws.org" $nums
fi
wc -l $nums
calcStatistics $nums
}
#
# check getting the versions
#
versioncheck() {
tmp=/tmp/ceur-ws-versions-grep.txt
rm $tmp
for v in $(volumes)
do
grep CEURVERSION $v/index.html >> $tmp
done
wc -l $tmp
}
#
# get the list of versions
#
versionlist() {
tmp=/tmp/ceur-ws-versions.txt
rm $tmp
for v in $(volumes)
do
head -5 $v/index.html | grep CEURVERSION | cut -f2 -d'=' | cut -f1 -d" " | sed '/-->/d' | awk '{$1=$1;print}' >> $tmp
done
sort $tmp | uniq -c | awk '
BEGIN {
quote="\x22"
printf("{| class=%swikitable%s\n",quote,quote)
print "|-"
print "! # !! version !! volumes"
}
{
count=$1
sum+=count
version=$2
print "|-"
printf ("| %2d || %s || %4d \n",NR,version,count)
}
END {
print "|-"
printf ("| || %s || %4d \n","sum",sum)
print "|}"
}
'
}
#
# count the volumes
#
volcount() {
nums=/tmp/ceur-ws-volumes.txt
rm $nums
for v in $(volumes)
do
echo $v >> $nums
done
wc -l $nums
}
#
# search by pattern
grepContent() {
local l_pattern="$1"
for v in $(volumes)
do
found=$(grep "$l_pattern" $v/index.html)
if [ $? -eq 0 ]
then
echo $v:$found
fi
done
}
#
# count papers
#
papercount() {
nums=/tmp/ceur-ws-volumes.txt
if [ ! -f $nums ]
then
colcount
fi
for v in $(cat $nums)
do
find $v -name "*.pdf"
done
}
# uncomment the statistics you'd like to apply
#grepContent "Christoph Lange"
#volcount
#papercount
versionlist
#versioncheck
#linkStats